import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
import plotly.express as px
df = pd.read_csv("C:/Users/rushi/OneDrive/Desktop/Personal Project/IPL Aution Analysis-2023/IPL_2023-22_Sold_Players.csv")
df
| Season | Name | Nationality | Type | Team | Price | |
|---|---|---|---|---|---|---|
| 0 | 2023 | Ajinkya Rahane | Indian | Batter | Chennai Super Kings | 50,00,000 |
| 1 | 2023 | Bhagath Varma | Indian | All-Rounder | Chennai Super Kings | 20,00,000 |
| 2 | 2023 | Kyle Jamieson | Overseas | Bowler | Chennai Super Kings | 1,00,00,000 |
| 3 | 2023 | Ajay Mandal | Indian | All-Rounder | Chennai Super Kings | 20,00,000 |
| 4 | 2023 | Nishant Sindhu | Indian | All-Rounder | Chennai Super Kings | 60,00,000 |
| ... | ... | ... | ... | ... | ... | ... |
| 279 | 2022 | Fazalhaq Farooqi | Overseas | Bowler | Sunrisers Hyderabad | 50,00,000 |
| 280 | 2022 | Sean Abbott | Overseas | Bowler | Sunrisers Hyderabad | 2,40,00,000 |
| 281 | 2022 | R Samarth | Indian | Batsman | Sunrisers Hyderabad | 20,00,000 |
| 282 | 2022 | Shashank Singh | Indian | All-Rounder | Sunrisers Hyderabad | 20,00,000 |
| 283 | 2022 | Saurabh Dubey | Indian | Bowler | Sunrisers Hyderabad | 20,00,000 |
284 rows × 6 columns
df.info()
<class 'pandas.core.frame.DataFrame'> RangeIndex: 284 entries, 0 to 283 Data columns (total 6 columns): # Column Non-Null Count Dtype --- ------ -------------- ----- 0 Season 284 non-null int64 1 Name 284 non-null object 2 Nationality 284 non-null object 3 Type 284 non-null object 4 Team 284 non-null object 5 Price 284 non-null object dtypes: int64(1), object(5) memory usage: 13.4+ KB
df['Price'] = df['Price'].apply(lambda x:str(x).replace(',','')).astype('float')
df['Price']
0 5000000.0
1 2000000.0
2 10000000.0
3 2000000.0
4 6000000.0
...
279 5000000.0
280 24000000.0
281 2000000.0
282 2000000.0
283 2000000.0
Name: Price, Length: 284, dtype: float64
for col in df.columns:
print('Number of Unique Values in', col,'are',df[col].nunique())
Number of Unique Values in Season are 2 Number of Unique Values in Name are 266 Number of Unique Values in Nationality are 2 Number of Unique Values in Type are 6 Number of Unique Values in Team are 10 Number of Unique Values in Price are 66
df['Type'].unique()
array(['Batter ', 'All-Rounder ', 'Bowler ', 'Wicket-Keeper ', 'Batsman ',
'Wicket Keeper '], dtype=object)
df['Type']=df['Type'].apply(lambda x:x.replace('Batter ','Batsman'))
df['Type']=df['Type'].apply(lambda x:x.replace('Batsman ','Batsman'))
df['Type']=df['Type'].apply(lambda x:x.replace('All-Rounder ','All-Rounder'))
df['Type']=df['Type'].apply(lambda x:x.replace(' All-Rounder ','All-Rounder'))
df['Type']=df['Type'].apply(lambda x:x.replace(' All-Rounder','All-Rounder'))
df['Type']=df['Type'].apply(lambda x:x.replace('Bowler','Bowler'))
df['Type']=df['Type'].apply(lambda x:x.replace('Bowler ','Bowler'))
df['Type']=df['Type'].apply(lambda x:x.replace(' Bowler','Bowler'))
df['Type']=df['Type'].apply(lambda x:x.replace('Wicket-Keeper ','Wicket-Keeper'))
df['Type']=df['Type'].apply(lambda x:x.replace('Wicket Keeper ','Wicket-Keeper'))
df['Type']=df['Type'].apply(lambda x:x.replace(' Wicket Keeper','Wicket-Keeper'))
df['Type']=df['Type'].apply(lambda x:x.replace('Wicket Keeper','Wicket-Keeper'))
df['Type'].unique()
array(['Batsman', 'All-Rounder', 'Bowler', 'Wicket-Keeper'], dtype=object)
df.duplicated().sum()
0
df['Nationality'].value_counts()
Indian 188 Overseas 96 Name: Nationality, dtype: int64
418 VALUES RE NOT AVIALABLE WE CANNOT REPLCE WITH ANY OTHER VALUES SO,JUST KEEPING IT AS SAME AND IT IS NOT IDENTIFIED AS A NULL VALUE
for col in df.columns:
print('No. of Null Values in',col, 'are', df[col].isnull().sum() )
No. of Null Values in Season are 0 No. of Null Values in Name are 0 No. of Null Values in Nationality are 0 No. of Null Values in Type are 0 No. of Null Values in Team are 0 No. of Null Values in Price are 0
df.groupby('Nationality')['Price'].mean()
Nationality Indian 1.894415e+07 Overseas 3.776562e+07 Name: Price, dtype: float64
OVERSEAS PLAYERS GETTING HIGH PAY COMPARED TOOUR INDIAN PLAYERS
df['Nationality'] = df['Nationality'].apply(lambda x:'na' if "Not Available" in x else x)
SWARMPLOT IS LIKE SCATTER PLOT SO, WE CAN OBSERVE HOW DATA POINTS ARE DISTRIBUTED
sns.swarmplot(x = "Nationality", y="Price", data=df)
C:\Users\rushi\anaconda3\lib\site-packages\seaborn\categorical.py:1296: UserWarning: 45.7% of the points cannot be placed; you may want to decrease the size of the markers or use stripplot. warnings.warn(msg, UserWarning)
<AxesSubplot:xlabel='Nationality', ylabel='Price'>
df = df.sort_values(by='Price',ascending=False)
df.head(10)
| Season | Name | Nationality | Type | Team | Price | |
|---|---|---|---|---|---|---|
| 49 | 2023 | Sam Curran | Overseas | All-Rounder | Punjab Kings | 185000000.0 |
| 40 | 2023 | Cameron Green | Overseas | All-Rounder | Mumbai Indians | 175000000.0 |
| 6 | 2023 | Ben Stokes | Overseas | All-Rounder | Chennai Super Kings | 162500000.0 |
| 28 | 2023 | Nicholas Pooran | Overseas | Wicket-Keeper | Lucknow Super Giants | 160000000.0 |
| 180 | 2022 | Ishan Kishan | Indian | Wicket-Keeper | Mumbai Indians | 152500000.0 |
| 83 | 2022 | Deepak Chahar | Indian | Bowler | Chennai Super Kings | 140000000.0 |
| 68 | 2023 | Harry Brook | Overseas | Batsman | Sunrisers Hyderabad | 132500000.0 |
| 142 | 2022 | Shreyas Iyer | Indian | Batsman | Kolkata Knight Riders | 122500000.0 |
| 210 | 2022 | Liam Livingstone | Overseas | All-Rounder | Punjab Kings | 115000000.0 |
| 246 | 2022 | Wanindu Hasaranga | Overseas | All-Rounder | Royal Challengers Bangalore | 107500000.0 |
THESE ARE THE TOP PLAYERS IN IPL
SAM CURRAN IS THE HIGHEST PAID PLAYER IN IPL HISTORY FROM 2013 TO 2023
df.groupby('Type')['Price'].mean()
Type All-Rounder 2.680189e+07 Batsman 2.726042e+07 Bowler 2.166667e+07 Wicket-Keeper 2.816176e+07 Name: Price, dtype: float64
WICKET KEEPERS AND ALL ROUNDERS ARE GETTING NEARLY EQUAL AMOUNT OF AVERAGE PAY of 2 CRORES
top_10_paid = (df.groupby('Name')['Price'].mean().sort_values(ascending=False)).head(10)
top_10_paid
Name Sam Curran 185000000.0 Cameron Green 175000000.0 Ben Stokes 162500000.0 Ishan Kishan 152500000.0 Deepak Chahar 140000000.0 Nicholas Pooran 133750000.0 Harry Brook 132500000.0 Shreyas Iyer 122500000.0 Liam Livingstone 115000000.0 Harshal Patel 107500000.0 Name: Price, dtype: float64
fig = px.bar(x=top_10_paid.index,
y=top_10_paid.values,
color=top_10_paid.index,
text=top_10_paid.values,
title='Top_Paid_Cricketers')
fig.update_layout(xaxis_title='Players',yaxis_title='Amount')
features = ['Name', 'Nationality', 'Type', 'Team']
for feature in features:
Contineuts_population=df.groupby(by=feature)['Price'].mean().sort_values(ascending=False).head(7)
fig = px.bar(x=Contineuts_population.index,
y=Contineuts_population.values,
color=Contineuts_population,
text=Contineuts_population.values,
color_discrete_sequence=px.colors.sequential.PuBu,
template='plotly_dark')
fig.update_layout(xaxis_title=feature,yaxis_title='Payment paid')
fig.show()
WE CAN ANALSE
1)FOREIGN PLAYERS PAID HIGH COMPARED TO INDIAN PLAYERS
2)BOWLERS ARE GETTING LOW PAY COMPARING TO REMAINING
3)PUNJAB KINGS UTILISED MOST OF IT'S MONEY ON PLAYERS
lowest_paid_players=(df.groupby('Name')['Price'].mean().sort_values(ascending=True)).head(10)
lowest_paid_players
Name Abdul P A 2000000.0 Mukesh Choudhary 2000000.0 Mohsin Khan 2000000.0 Mohit Rathee 2000000.0 Mohd. Arshad Khan 2000000.0 Tejas Baroka 2000000.0 Mayank Yadav 2000000.0 Yudhvir Charak 2000000.0 Manoj Bhandage 2000000.0 Manan Vohra 2000000.0 Name: Price, dtype: float64
fig=px.bar(x=lowest_paid_players.index,
y=lowest_paid_players.values,
color= lowest_paid_players.index,
text= lowest_paid_players.values,
title= 'Lowest Paid Players')
fig.update_layout(xaxis_title='Players', yaxis_title='Amount')
fig=px.bar(x=lowest_paid_players.index,
y= lowest_paid_players.values,
color= lowest_paid_players.index,
text= lowest_paid_players.values,
title=' lowest_paid_players')
fig.update_layout(xaxis_title='Players',yaxis_title='Amount')